##Where's Waldo: Searching for the Hidden Variable of "Real" Corruption
##By: Chiara Superti and Jennifer Pan
##R CODE FOR SECTION 1 AND 2 OF REPLICATION PAPER
############################################################################
############################################################################


############################################################################
##SETUP: Load needed packages
############################################################################

library(foreign)
library(Zelig)
library(MASS)
library(xtable)
library(car)
library(pls)


##SETUP: Load data
extension.data <-read.dta("crime_WES_ICVS.dta")
extension.data82 <- extension.data[extension.data$indepdate<1982,]  #get countries indpt after 1982
attach(extension.data82)
sort(isocode)

extension.data82.No.CHE <- subset(extension.data82,isocode!="CHE")  #take out Switzerland
extension.data82.No.CHE$isocode
detach(extension.data82)
attach(extension.data82.No.CHE)
#names(extension.data82.No.CHE)


##SETUP: Renaming data to make it easier to manage
#Data from UNODC
totcrimes <-grandtotalofrecordedcrimes
totdrug <-totalrecordeddrugoffenses 
totbribe <-totalrecordedbriberycrimes
totembez <-totalrecordedembezzlements
totfraud <-totalrecordedfrauds
tothom <-totalrecordedintentionalhomicide
totkid <-totalrecordedkidnappings
totprosec_int_Hom <-totalprosecutedforintentionalhom
totincar <- totalpersonsincarcerated
totpros <- totalmalesprosecuted+totalfemalesprosecuted
totcorr <- totbribe+totembez
totcorsc <- (totcorr-mean(totcorr,na.rm=TRUE))/sd(totcorr,na.rm=TRUE)

#Weighted data from UNODC
weighted_corr <- ((totbribe+totembez)*(totcrimes-mean(totcrimes,na.rm=TRUE)/sd(totcrimes,na.rm=TRUE)))/10000000
weighted_corrsc <- (weighted_corr-mean(weighted_corr,na.rm=TRUE))/sd(weighted_corr,na.rm=TRUE)
totsolve <- totincar/totcrimes
logtotsolve <- log(totsolve)

#Data from WEF (Global Competitiveness Report)
#Averaged responses for 2001-02, 2002-03, 2004-2005
publ_trust <- (public_trust0405+public_trust0203+public_trust0102)/3
imp_exp_corr <- (importexport0405+importexport0203+importexport0102)/3
publi_cont_corr <- (public_contract0203+public_contract0102+public_contract0405)/3
bus_cost_corr <- (business_costs0203+business_costs0102+business_costs0405)/3


############################################################################
##NON PARAMETRIC ANALYSIS OF ALL MEASURES (on new merged dataset)
##For the beginning
############################################################################
##KKM
dataplot<-na.omit(cbind(kkm0205,lavgten_exec8201))
pdf("KKMlowess.pdf")
plot(lavgten_exec8201,kkm0205, xlab="Executive Tenure", ylab="KKM", main="KKM and Stability")
lines(lowess(dataplot[,2],dataplot[,1]), col="blue", lwd=2)
dev.off()
pdf("KKMhist.pdf")
hist(kkm0205, col="lightblue", xlab="KKM", main="KKM")
dev.off()

##CPI
dataplot<-na.omit(cbind(cpi0205,lavgten_exec8201))
pdf("CPIlowess.pdf")
plot(lavgten_exec8201,cpi0205, xlab="Executive Tenure", ylab="CPI", main="CPI and Stability")
lines(lowess(dataplot[,2],dataplot[,1]), col="blue", lwd=2)
dev.off()
pdf("CPIhist.pdf")
hist(cpi0205, col="lightblue", xlab="CPI", main="CPI")
dev.off()

##ICRG
dataplot<-na.omit(cbind(icrg0205,lavgten_exec8201))
pdf("ICRGlowess.pdf")
plot(lavgten_exec8201, icrg0205, xlab="Executive Tenure", ylab="ICRG", main="ICRG and Stability")
lines(lowess(dataplot[,2],dataplot[,1]), col="blue", lwd=2)
dev.off()
pdf("ICRGhist.pdf")
hist(icrg0205, col="lightblue", xlab="ICRG", main="ICRG")
dev.off()

##UNODC (unweighted)
dataplot<-na.omit(cbind(totcorsc,lavgten_exec8201))
pdf("UNODClowess.pdf")
plot(lavgten_exec8201, totcorsc, xlab="Executive Tenure", ylab="UNODC", main="UNODC and Stability")
lines(lowess(dataplot[,2],dataplot[,1]), col="forestgreen", lwd=2)
dev.off()
pdf("UNODChist.pdf")
hist(totcorsc, col="lightgreen", xlab="UNODC", main="UNODC")
dev.off()

##UNODCw (weighted, w/ scaling for graphical purposes only)
dataplot<-na.omit(cbind(weighted_corrsc,lavgten_exec8201))
pdf("UNODCwlowess.pdf")
plot(lavgten_exec8201, weighted_corrsc, xlab="Executive Tenure", ylab="UNODCw", main="UNODCw and Stability")
lines(lowess(dataplot[,2],dataplot[,1]), col="forestgreen", lwd=2)
dev.off()
pdf("UNODCwhist.pdf")
hist(weighted_corrsc, col="lightgreen", xlab="UNODCw", main="UNODCw")
dev.off()

##WEFie
dataplot<-na.omit(cbind(imp_exp_corr,lavgten_exec8201))
pdf("WEFielowess.pdf")
plot(lavgten_exec8201, imp_exp_corr, xlab="Executive Tenure", ylab="WEFie", main="WEFie and Stability")
lines(lowess(dataplot[,2],dataplot[,1]), col="orange", lwd=2)
dev.off()
pdf("WEFiehist.pdf")
hist(imp_exp_corr, col="goldenrod", xlab="WEFie", main="WEFie")
dev.off()

##WEFpc
dataplot<-na.omit(cbind(publi_cont_corr,lavgten_exec8201))
pdf("WEFpclowess.pdf")
plot(lavgten_exec8201, publi_cont_corr, xlab="Executive Tenure", ylab="WEFpc", main="WEFpc and Stability")
lines(lowess(dataplot[,2],dataplot[,1]), col="orange", lwd=2)
dev.off()
pdf("WEFpchist.pdf")
hist(publi_cont_corr, col="goldenrod", xlab="WEFpc", main="WEFpc")
dev.off()

##WEFbc
dataplot<-na.omit(cbind(bus_cost_corr,lavgten_exec8201))
pdf("WEFbclowess.pdf")
plot(lavgten_exec8201, bus_cost_corr, xlab="Executive Tenure", ylab="WEFbc", main="WEFbc and Stability")
lines(lowess(dataplot[,2],dataplot[,1]), col="orange", lwd=2)
dev.off()
pdf("WEFbchist.pdf")
hist(bus_cost_corr, col="goldenrod", xlab="WEFbc", main="WEFbc")
dev.off()


############################################################################
##Test Campante Min-Spec. Cross-Sectional Reg Model using UNODC
############################################################################
##Minimum specification model w/ KKM as measure of corruption
minspec.KKM <-lm(kkm0205~lavgten_exec8201+lavgten_exec8201sq)
minspec.KKM.s <-summary(minspec.KKM)
minspec.KKM.s
WH_SE_minspec.KKM <-sqrt(diag(hccm(minspec.KKM,type="hc1")))
WH_SE_minspec.KKM

##Minimum specification model w/ UNODC unweighted
minspec.UNODC <- lm(totcorr~lavgten_exec8201+lavgten_exec8201sq)
minspec.UNODC.s <-summary(minspec.UNODC) 
minspec.UNODC.s
WH_SE_minspec.UNODC <-sqrt(diag(hccm(minspec.UNODC,type="hc1")))
WH_SE_minspec.UNODC

##Minimum specification model w/ UNODC weighted
minspec.UNODCw <- lm(weighted_corr~lavgten_exec8201+lavgten_exec8201sq)
minspec.UNODCw.s <-summary(minspec.UNODCw) 
minspec.UNODCw.s
WH_SE_minspec.UNODCw <-sqrt(diag(hccm(minspec.UNODCw,type="hc1")))
WH_SE_minspec.UNODCw

xtable(cbind(minspec.UNODC.s$coefficients[,1],WH_SE_minspec.UNODC,
  minspec.UNODCw.s$coefficients[,1],WH_SE_minspec.UNODCw,
  minspec.KKM.s$coefficients[,1],WH_SE_minspec.KKM), digits=3)



############################################################################
##Test Campante Model Using Ordered Categorical Model (KKM)
############################################################################
##Coarsen KKM into quatiles (25, 50, 75)
coars_kkm <- kkm0205
min(coars_kkm,na.rm=TRUE)
quantile(coars_kkm,.25,na.rm=TRUE)
quantile(coars_kkm,.50,na.rm=TRUE)
quantile(coars_kkm,.75,na.rm=TRUE)
coars_kkm <- recode(coars_kkm,"-2.5:-1.3='low';-1.3:-0.3='mediumlow';-0.3:0.5='mediumhigh';NA=NA;else='high'")
coars_kkm.fac <- as.factor(coars_kkm)

extension.data82.No.CHE.new2<-cbind(extension.data82.No.CHE,coars_kkm.fac)

reg.coars_kkm.fac <- zelig(formula=coars_kkm.fac~lavgten_exec8201+lwdigdppc8201sq+lwdigdppc8201+elf_eth+democ8201+
  imports8201+fuel8201+ores8201, data=extension.data82.No.CHE.new2, model="oprobit")
summary(reg.coars_kkm.fac)

min_exten <- setx(reg.coars_kkm.fac,lavgten_exec8201=min(lavgten_exec8201,na.rm=TRUE))  #Very Unstable
max_exten <- setx(reg.coars_kkm.fac,lavgten_exec8201=max(lavgten_exec8201,na.rm=TRUE))  #Very Stable
sim.out7 <- sim(reg.coars_kkm.fac,x=min_exten,x1=max_exten)
sim.out8 <- sim(reg.coars_kkm.fac,x=max_exten,x1=min_exten)

summary(sim.out7)
summary(sim.out8)

pdf("KKM_sim_Un.pdf")
plot(sim.out7)
dev.off()
pdf("KKM_sim_St.pdf")
plot(sim.out8)
dev.off()



############################################################################
##Test Campante Model Using Ordered Categorical Model (CPI)
############################################################################
coars_CPI <- cpi0205 
min(coars_CPI ,na.rm=TRUE)
quantile(coars_CPI ,.25,na.rm=TRUE)
quantile(coars_CPI  ,.50,na.rm=TRUE)
quantile(coars_CPI,.75,na.rm=TRUE)
max(coars_CPI ,na.rm=TRUE)
coars_CPI<- recode(coars_CPI ,"-2.34:-1.2='low';-1.2:0.36 ='mediumlow';0.361:0.9='mediumhigh';0.91:1.6='high'")
coars_CPI.fac <- as.factor(coars_CPI)

extension.data82.No.CHE.new6 <- cbind(extension.data82.No.CHE,coars_CPI.fac)
attach(extension.data82.No.CHE.new6)

reg.coars_CPI.fac<- zelig(formula=coars_CPI.fac~lavgten_exec8201+lwdigdppc8201sq+lwdigdppc8201+elf_eth+democ8201+
  imports8201+fuel8201+ores8201, data=extension.data82.No.CHE.new6, model="oprobit")
summary(reg.coars_CPI.fac)

min_exten <- setx(reg.coars_CPI.fac,lavgten_exec8201=min(lavgten_exec8201,na.rm=TRUE))
max_exten <- setx(reg.coars_CPI.fac,lavgten_exec8201=max(lavgten_exec8201,na.rm=TRUE))
sim.out17 <- sim(reg.coars_CPI.fac,x=min_exten,x1=max_exten)
sim.out18 <- sim(reg.coars_CPI.fac,x=max_exten,x1=min_exten)

pdf("CPIminstab.pdf")
plot(sim.out17)
dev.off()
pdf("CPImaxstab.pdf")
plot(sim.out18)
dev.off()

summary(sim.out17)
summary(sim.out18)


############################################################################
##Test Campante Model Using Ordered Categorical Model (UNODC)
############################################################################
##Coarsen tot_corr and weighted_corr into quatiles (20, 50, 75)
##tot_corr
coars_totcorr <- totcorr
min(coars_totcorr,na.rm=TRUE)
quantile(coars_totcorr,.25,na.rm=TRUE)
quantile(coars_totcorr,.50,na.rm=TRUE)
quantile(coars_totcorr,.75,na.rm=TRUE)
max(coars_totcorr,na.rm=TRUE)
coars_totcorr <- recode(coars_totcorr,"13.5:187.4='low';187.5:1167.6 ='mediumlow';1167.6 :5401.05='mediumhigh';5401.06:19472='high'")
coars_totcorr.fac <- as.factor(coars_totcorr)

extension.data82.No.CHE.new <- cbind(extension.data82.No.CHE,coars_totcorr,coars_totcorr.fac )
attach(extension.data82.No.CHE.new)

reg.coars_totcorr <- zelig(formula=coars_totcorr.fac~lavgten_exec8201+lwdigdppc8201sq+lwdigdppc8201+elf_eth+democ8201+
  imports8201+fuel8201+ores8201, data=extension.data82.No.CHE.new, model="oprobit")
summary(reg.coars_totcorr)

min_exten <- setx(reg.coars_totcorr,lavgten_exec8201=min(lavgten_exec8201,na.rm=TRUE))
max_exten <- setx(reg.coars_totcorr,lavgten_exec8201=max(lavgten_exec8201,na.rm=TRUE))
sim.out3 <- sim(reg.coars_totcorr,x=min_exten,x1=max_exten)
sim.out4 <- sim(reg.coars_totcorr,x=max_exten,x1=min_exten)

#plot(sim.out3)
#plot(sim.out4)
summary(sim.out3)
summary(sim.out4)

##weighted_corr
coars_weighted_corr <- weighted_corr
min(coars_weighted_corr,na.rm=TRUE)
quantile(coars_weighted_corr,.25,na.rm=TRUE)
quantile(coars_weighted_corr,.50,na.rm=TRUE)
quantile(coars_weighted_corr,.75,na.rm=TRUE)
max(coars_weighted_corr,na.rm=TRUE)
coars_weighted_corr <- recode(coars_weighted_corr,"0.015:1.127='low';1.128:10.99 ='mediumlow';10.991:288.607='mediumhigh';288.608:12242='high'")
coars_weighted_corr.fac <- as.factor(coars_weighted_corr)

extension.data82.No.CHE.new1 <- cbind(extension.data82.No.CHE.new,coars_weighted_corr,coars_weighted_corr.fac )
attach(extension.data82.No.CHE.new1)

reg.coars_weighted_corr.fac <- zelig(formula=coars_weighted_corr.fac~lavgten_exec8201+lwdigdppc8201sq+lwdigdppc8201+elf_eth+democ8201+
  imports8201+fuel8201+ores8201, data=extension.data82.No.CHE.new1, model="oprobit")
summary(reg.coars_weighted_corr.fac)

min_exten <- setx(reg.coars_weighted_corr.fac,lavgten_exec8201=min(lavgten_exec8201,na.rm=TRUE))
max_exten <- setx(reg.coars_weighted_corr.fac,lavgten_exec8201=max(lavgten_exec8201,na.rm=TRUE))
sim.out5 <- sim(reg.coars_weighted_corr.fac,x=min_exten,x1=max_exten)
sim.out6 <- sim(reg.coars_weighted_corr.fac,x=max_exten,x1=min_exten)

#plot(sim.out5)
#plot(sim.out6)
summary(sim.out5)
summary(sim.out6)


############################################################################
##Test Campante Model Using Ordered Categorical Model (WEF)
############################################################################
####Irregular payments associated with Import and Export licenses (WEFie)
coars_imp_exp_corr <-imp_exp_corr 
min(coars_imp_exp_corr,na.rm=TRUE)
quantile(coars_imp_exp_corr,.25,na.rm=TRUE)
quantile(coars_imp_exp_corr ,.50,na.rm=TRUE)
quantile(coars_imp_exp_corr ,.75,na.rm=TRUE)
max(coars_imp_exp_corr,na.rm=TRUE)

coars_imp_exp_corr <- recode(coars_imp_exp_corr,"3.1:4.06='high';4.07:5.17 ='mediumhigh';5.18:6.22='mediumlow';6.221:6.87='low'")
coars_imp_exp_corr.fac <- as.factor(coars_imp_exp_corr )

extension.data82.No.CHE.new2 <- cbind(extension.data82.No.CHE.new,coars_imp_exp_corr.fac)
attach(extension.data82.No.CHE.new2)

reg.coars_imp_exp_corr.fac<- zelig(formula=coars_imp_exp_corr.fac~lavgten_exec8201+lwdigdppc8201sq+lwdigdppc8201+elf_eth+democ8201+
  imports8201+fuel8201+ores8201, data=extension.data82.No.CHE.new2, model="oprobit")
summary(reg.coars_imp_exp_corr.fac)

min_exten <- setx(reg.coars_imp_exp_corr.fac,lavgten_exec8201=min(lavgten_exec8201,na.rm=TRUE))
max_exten <- setx(reg.coars_imp_exp_corr.fac,lavgten_exec8201=max(lavgten_exec8201,na.rm=TRUE))
sim.out9 <- sim(reg.coars_imp_exp_corr.fac,x=min_exten,x1=max_exten)
sim.out10 <- sim(reg.coars_imp_exp_corr.fac,x=max_exten,x1=min_exten)

pdf("WEFieminstab.pdf")
plot(sim.out9)
dev.off()
pdf("WEFiemaxstab.pdf")
plot(sim.out10)
dev.off()

summary(sim.out9)
summary(sim.out10)

#plot(sim.out19)
summary(sim.out19)
summary(sim.out20)

####Irregular payments associated with Public Contracts (WEFpc)
coars_publi_cont_corr <-publi_cont_corr
min(coars_publi_cont_corr,na.rm=TRUE)
quantile(coars_publi_cont_corr,.25,na.rm=TRUE)
quantile(coars_publi_cont_corr ,.50,na.rm=TRUE)
quantile(coars_publi_cont_corr ,.75,na.rm=TRUE)
max(coars_publi_cont_corr,na.rm=TRUE)
coars_publi_cont_corr<- recode(coars_publi_cont_corr,"2.78:3.61='high';3.62:4.3 ='mediumhigh';4.31:5.37='mediumlow';5.38:6.64='low'")
coars_publi_cont_corr.fac <- as.factor(coars_publi_cont_corr)

extension.data82.No.CHE.new3 <- cbind(extension.data82.No.CHE.new,coars_publi_cont_corr.fac)
attach(extension.data82.No.CHE.new3)

reg.coars_publi_cont_corr.fac<- zelig(formula=coars_publi_cont_corr.fac~lavgten_exec8201+lwdigdppc8201sq+lwdigdppc8201+elf_eth+democ8201+
  imports8201+fuel8201+ores8201, data=extension.data82.No.CHE.new3, model="oprobit")
summary(reg.coars_publi_cont_corr.fac)

min_exten <- setx(reg.coars_publi_cont_corr.fac,lavgten_exec8201=min(lavgten_exec8201,na.rm=TRUE))
max_exten <- setx(reg.coars_publi_cont_corr.fac,lavgten_exec8201=max(lavgten_exec8201,na.rm=TRUE))
sim.out11 <- sim(reg.coars_publi_cont_corr.fac,x=min_exten,x1=max_exten)
sim.out12 <- sim(reg.coars_publi_cont_corr.fac,x=max_exten,x1=min_exten)

pdf("WEFpcminstab.pdf")
plot(sim.out11)
dev.off()
pdf("WEFpcmaxstab.pdf")
plot(sim.out12)
dev.off()

summary(sim.out11)
summary(sim.out12)

####Business costs of corruption (WEFbc)
coars_bus_cost_corr <-bus_cost_corr 
min(coars_bus_cost_corr ,na.rm=TRUE)
quantile(coars_bus_cost_corr ,.25,na.rm=TRUE)
quantile(coars_bus_cost_corr  ,.50,na.rm=TRUE)
quantile(coars_bus_cost_corr ,.75,na.rm=TRUE)
max(coars_bus_cost_corr ,na.rm=TRUE)
coars_bus_cost_corr<- recode(coars_bus_cost_corr ,"3.1:4.12='high';4.12:4.72 ='mediumhigh';4.72:5.85='mediumlow';5.85:6.74='low'")
coars_bus_cost_corr.fac <- as.factor(coars_bus_cost_corr)

extension.data82.No.CHE.new5 <- cbind(extension.data82.No.CHE.new,coars_bus_cost_corr.fac)
attach(extension.data82.No.CHE.new5)

reg.coars_bus_cost_corr.fac<- zelig(formula=coars_bus_cost_corr.fac~lavgten_exec8201+lwdigdppc8201sq+lwdigdppc8201+elf_eth+democ8201+
  imports8201+fuel8201+ores8201, data=extension.data82.No.CHE.new5, model="oprobit")
summary(reg.coars_bus_cost_corr.fac)

min_exten <- setx(reg.coars_bus_cost_corr.fac,lavgten_exec8201=min(lavgten_exec8201,na.rm=TRUE))
max_exten <- setx(reg.coars_bus_cost_corr.fac,lavgten_exec8201=max(lavgten_exec8201,na.rm=TRUE))
sim.out15 <- sim(reg.coars_bus_cost_corr.fac,x=min_exten,x1=max_exten)
sim.out16 <- sim(reg.coars_bus_cost_corr.fac,x=max_exten,x1=min_exten)

pdf("BUS_COS_sim_Un.pdf")
plot(sim.out15)
dev.off()

pdf("BUS_COS_sim_St.pdf")
plot(sim.out16)
dev.off()
summary(sim.out15)
summary(sim.out16)